notebook.community

Edit and run



In [1]:

    
%matplotlib inline

import matplotlib.pyplot as plt
from IPython.display import display_html, HTML
import urllib
import glob
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import numpy as np
from nltk.stem.porter import PorterStemmer
from nltk import word_tokenize          
from nltk.stem import WordNetLemmatizer
import re

letters = re.compile('[a-zA-Z]')

def df_to_html(df):
    display_html(HTML(df.to_html()))

def load_gutenberg_book(url, char_limit=10000, min_len_of_sections=40):
    """
    Returns a list of paragraphs in the book.
    
    url: A url from Project Gutenberg.
    char_limit: Amount of characters of the book to read.
    min_len_of_sections: Each paragraph must be at least this many characters long.
    """
    book = urllib.urlopen(url)
    book_text = book.read(char_limit if char_limit else -1)
    
    result = []
    for text in book_text[:char_limit].split("\r\n\r\n"):
        if len(text) >= min_len_of_sections:
            clean_text = text.replace("\r\n", " ").strip()
            result.append(clean_text)
    
    start_position = len(result) if len(result) < 6 else 6
    return result[start_position:]

def get_text(path):
    """
    Handle all the weird ways books are encoded.
    """
    encoding_options = "ascii utf-8 utf-16 utf-32 utf-16-be utf-16-le utf-32-be utf-32-le".split()
    
    for encoding in encoding_options:
        try:
            with open(path, encoding=encoding) as book:
                return book.read()
        except UnicodeDecodeError:
            continue
    raise ValueError

def extract_term(term_indicator, text, default=None, max_term_size=75):
    term_start = text.find(term_indicator)
    # If not found, return default.
    if term_start == -1:
        term = default
    else:
        term_end = text.find("\n", term_start)
        term = text[term_start+len(term_indicator):term_end].strip()
    if term and (len(term) > max_term_size):
        term = default
    return term

def get_author_and_title(book_text, title_case=True):
    title = extract_term("Title:", book_text, default=None)
    author = extract_term("Author:", book_text, default=None)
    # Solve for other strange author name formatting
    for term_indicator in ["\n\nby ", "\n\nOF ", "\nOF\n"]:
        if author is None:
            author = extract_term(term_indicator, book_text[:15000], max_term_size=25)
    if title_case and title and author:
        title, author = title.title(), author.title()
    return title, author

def locate_beginning_of_text(title, author, text):
    location = text.find("START OF THIS PROJECT GUTENBERG") + 20
    
    if location < 0:
        if title:
            location = text.find(title)
        if author:
            location = text.find(author)
            
    return location

def locate_end_of_text(text):
    f = text.find
    
    search_terms = ["End of Project Gutenberg",
                    "END OF THIS PROJECT GUTENBERG EBOOK",
                    "END OF THE PROJECT GUTENBERG EBOOK",
                    "End of the Project Gutenberg Etext"]
    
    location = max([f(term) for term in search_terms])
    if location < 0:
        print("Fail")
        location = None
    return location

def parse_book(book_text, min_paragraph_characters=100):
    """
    Given the text of a book, returns a list of dictionaries with the keys:
    {title, author, contents, part, hash}
    """
    parsed_book_paragraphs = []
    title, author = get_author_and_title(book_text)
    text_starts = locate_beginning_of_text(title, author, book_text)
    text_ends = locate_end_of_text(book_text)
    book_paragraphs = book_text[text_starts:text_ends].split("\n\n")
    for paragraph_number, raw_paragraph in enumerate(book_paragraphs):
        paragraph = raw_paragraph.replace("\n", " ").strip()
        if (len(paragraph) < min_paragraph_characters) or not re.search(letters, paragraph):
            continue
        if "gutenberg" in paragraph.lower() or "chapter" in paragraph.lower():
            continue
        book_data = {"title": title,
                     "author": author,
                     "contents": paragraph,
                     "part": paragraph_number}
        parsed_book_paragraphs.append(book_data)
    return parsed_book_paragraphs            

def get_list_of_book_paths(book_directory):
    return list(glob.iglob(book_directory + '/*.txt'))

def books_to_pandas(book_directory, min_paragraph_characters=100):
    paragraphs = []

    for filename in get_list_of_book_paths(book_directory):
        book_text = get_text(filename)
        parsed_book = parse_book(book_text, min_paragraph_characters)
        paragraphs.extend(parsed_book)
    
    return pd.DataFrame(paragraphs)

class LemmaTokenizer(object):
    def __init__(self):
        self.wnl = WordNetLemmatizer()
    def __call__(self, doc):
        return [self.wnl.lemmatize(t) for t in word_tokenize(doc)]

def cosine_similarity(new_docs, old_docs):
    """
    Returns a similarity matrix where the first row is an array of
    similarities of the first new_doc compared with each of the old
    docs.
    """
    return new_docs*old_docs.T

def find_closest_matches(similarity_matrix, n_matches_to_return=1):
    """
    Expects a dense array of the form [[1., .5, .2],
                                       [.3, 1., .1],
                                       [.2, .4, 1.]]
    where rows correspond to similarities.
    """
    top_indices = np.apply_along_axis(func1d=lambda x: x.argsort()[-n_matches_to_return:][::-1], 
                                      axis=1, 
                                      arr=similarity_matrix)
    return top_indices

simple_cache = {}

def search_book(paragraph, book_title, books, n_results=10, print_results=False, return_title=False):
    book_title_list = book_title if isinstance(book_title, (list, tuple)) else (book_title,)
    select_books = books[books.title.isin(book_title_list)].reset_index()
    contents = select_books.contents

    if book_title not in simple_cache:
        vectorizer = TfidfVectorizer(max_df=.7, min_df=.0001, tokenizer=LemmaTokenizer()).fit(contents)
        simple_cache[book_title] = {"vectorizer": vectorizer,
                                    "vect_book": vectorizer.transform(contents)}

    vectorizer = simple_cache[book_title]["vectorizer"]
    vect_book = simple_cache[book_title]["vect_book"]
    vect_paragraph = vectorizer.transform([paragraph])

    nbrs = NearestNeighbors(n_neighbors=n_results, algorithm='brute').fit(vect_book)

    distances, indices = nbrs.kneighbors(vect_paragraph)

    search_results = list(zip(distances[0], select_books.ix[indices[0]].contents, select_books.ix[indices[0]].title))
    
    if print_results:
        for dist, text, title in search_results:
            print(dist)
            print(text)
            print("\n")
    
    if return_title:
        return search_results
    return [(dist, text) for dist, text, title in search_results]

def compare_book_paragraphs(book_title, books, n_close_matches=10, same_book_in_corpus=True):
    results = []
    book_title_list = book_title if isinstance(book_title, (list, tuple)) else (book_title,)
    select_books = books[books.title.isin(book_title_list)].reset_index()
    
    for paragraph in select_books.contents:
        if same_book_in_corpus:
            result = search_book(paragraph, book_title, books, n_results=2)[1]
        else:
            result = search_book(paragraph, book_title, books, n_results=1)[0]
        results.append([paragraph] + list(result))

    df = pd.DataFrame(results, columns=["Text 1", "Distance", "Text 2"])
    df.sort_values("Distance", inplace=True)
    
    print("Perfect matches")
    perfect_matches = df[df.Distance == 0].drop_duplicates()
    df_to_html(perfect_matches)
    print("\n")

    print ("Close matches")
    top_close_matches = df[df.Distance != 0].drop_duplicates("Distance").head(n_close_matches)
    df_to_html(top_close_matches)
    
def compare_book_to_books(book_title, other_book_titles, books, n_close_matches=20):
    results = []
    
    if book_title in other_book_titles:
        other_book_titles = tuple([title for title in other_book_titles if title != book_title])
    
    select_books = books[books.title.isin(other_book_titles)].reset_index()
    book = books[books.title == book_title].reset_index()
    
    for paragraph in book.contents:
        result = search_book(paragraph, other_book_titles, books, n_results=1, return_title=True)[0]
        results.append([paragraph] + list(result))

    df = pd.DataFrame(results, columns=["Text 1", "Distance", "Text 2", "Title"])
    df.sort_values("Distance", inplace=True)
    
    print("Perfect matches")
    perfect_matches = df[df.Distance == 0].drop_duplicates()
    df_to_html(perfect_matches)
    print("\n")

    print ("Close matches")
    close_matches = df[df.Distance != 0]
    df_to_html(close_matches.drop_duplicates("Distance").head(n_close_matches))
    
    return close_matches



In [2]:

    
books = books_to_pandas("popular_books", min_paragraph_characters=1)



In [3]:

    
books.head()









    Out[3]:






  
    
      
      author
      contents
      part
      title
    
  
  
    
      0
      Alexandre Dumas, Pere
      THE COUNT OF MONTE CRISTO
      2
      The Count Of Monte Cristo
    
    
      1
      Alexandre Dumas, Pere
      by Alexandre Dumas, Pere
      3
      The Count Of Monte Cristo
    
    
      2
      Alexandre Dumas, Pere
      On the 24th of February, 1815, the look-out at...
      6
      The Count Of Monte Cristo
    
    
      3
      Alexandre Dumas, Pere
      As usual, a pilot put off immediately, and rou...
      7
      The Count Of Monte Cristo
    
    
      4
      Alexandre Dumas, Pere
      Immediately, and according to custom, the ramp...
      8
      The Count Of Monte Cristo



In [4]:

    
books.title.value_counts()









    Out[4]:





War And Peace                                11373
The Count Of Monte Cristo                    11211
The Three Musketeers                          8206
The Complete Works Of William Shakespeare     6351
The Man In The Iron Mask                      5404
The Works Of Edgar Allan Poe                  4836
Notre-Dame De Paris                           4025
Great Expectations                            3834
Ben-Hur                                       3572
A Tale Of Two Cities                          3315
The Adventures Of Sherlock Holmes             2540
Moby Dick; Or The Whale                       2485
A Journey To The Centre Of The Earth          2446
Adventures Of Huckleberry Finn, Complete      2386
The Phantom Of The Opera                      2340
Emma                                          2320
The Iliad Of Homer                            2199
Leviathan                                     2104
Dracula                                       2070
Pride And Prejudice                           2060
The Adventures Of Tom Sawyer, Complete        2028
Wuthering Heights                             1906
Sense And Sensibility                         1813
Around The World In 80 Days                   1681
Peter Pan                                     1659
Robin Hood                                    1625
The Picture Of Dorian Gray                    1501
Beowulf                                       1454
Treasure Island                               1437
Dorothy And The Wizard In Oz                  1329
The Invisible Man                             1144
The Wonderful Wizard Of Oz                    1139
The Jungle Book                                979
Grimms' Fairy Tales                            978
The Importance Of Being Earnest                960
Through The Looking-Glass                      938
The War Of The Worlds                          915
A Study In Scarlet                             825
Alice'S Adventures In Wonderland               798
The Scarlet Letter                             791
The Island Of Doctor Moreau                    759
Frankenstein                                   700
Youth                                          517
Dr. Jekyll And Mr. Hyde                        430
The Time Machine                               322
The Yellow Wallpaper                           269
Heart Of Darkness                              204
Metamorphosis                                  103
A Modest Proposal                               37
Name: title, dtype: int64

Search for one paragraph in one book

Number of comparisons ~ 5000



In [5]:

    
%%time
paragraph = "alice doesn't know which way to go"
book_title = "Alice'S Adventures In Wonderland"

search_book(paragraph, book_title, books, n_results=5, print_results=True)









    



0.872256370575
'Then it doesn't matter which way you go,' said the Cat.


1.14624113894
She ate a little bit, and said anxiously to herself, 'Which way? Which way?', holding her hand on the top of her head to feel which way it was growing, and she was quite surprised to find that she remained the same size: to be sure, this generally happens when one eats cake, but Alice had got so much into the way of expecting nothing but out-of-the-way things to happen, that it seemed quite dull and stupid for life to go on in the common way.


1.15233505399
'It goes on, you know,' the Hatter continued, 'in this way:--


1.22691887913
'Oh, I'm not particular as to size,' Alice hastily replied; 'only one doesn't like changing so often, you know.'


1.2269301777
'I DON'T know,' said the Caterpillar.


Wall time: 4.72 s



In [6]:

    
%%time
paragraph = "queen says off with his or her head"
book_title = "Alice'S Adventures In Wonderland"

search_book(paragraph, book_title, books, n_results=5, print_results=True)









    



1.12373165041
'Are their heads off?' shouted the Queen.


1.16790123046
All the time they were playing the Queen never left off quarrelling with the other players, and shouting 'Off with his head!' or 'Off with her head!' Those whom she sentenced were taken into custody by the soldiers, who of course had to leave off being arches to do this, so that by the end of half an hour or so there were no arches left, and all the players, except the King, the Queen, and Alice, were in custody and under sentence of execution.


1.16958287991
The Queen turned crimson with fury, and, after glaring at her for a moment like a wild beast, screamed 'Off with her head! Off--'


1.17078483868
The Queen had only one way of settling all difficulties, great or small. 'Off with his head!' she said, without even looking round.


1.17398804409
'Off with her head!' the Queen shouted at the top of her voice. Nobody moved.


Wall time: 16 ms

Compare all paragraphs in one book

Number of comparisons ~ 25 million



In [7]:

    
# See the entire string when printing a data frame
pd.set_option('display.max_colwidth', -1)



In [8]:

    
%%time
compare_book_paragraphs(book_title, books)









    



Perfect matches






    





  
    
      
      Text 1
      Distance
      Text 2
    
  
  
    
      260
      'Wow! wow! wow!'
      0
      'Wow! wow! wow!'
    
    
      254
      CHORUS.
      0
      CHORUS.
    
  







    




Close matches






    





  
    
      
      Text 1
      Distance
      Text 2
    
  
  
    
      592
      Will you, won't you, will you, won't you, will you join the dance?  Will you, won't you, will you, won't you, won't you join the dance?
      0.085699
      Will you, won't you, will you, won't you, will you join the dance?  Will you, won't you, will you, won't you, won't you join the dance?"'
    
    
      647
      'Beautiful Soup! Who cares for fish,    Game, or any other dish?    Who would not give all else for two    Pennyworth only of beautiful Soup?    Pennyworth only of beautiful Soup?      Beau--ootiful Soo--oop!      Beau--ootiful Soo--oop!    Soo--oop of the e--e--evening,      Beautiful, beauti--FUL SOUP!'
      0.581810
      'Beautiful Soup, so rich and green,    Waiting in a hot tureen!    Who for such dainties would not stoop?    Soup of the evening, beautiful Soup!    Soup of the evening, beautiful Soup!      Beau--ootiful Soo--oop!      Beau--ootiful Soo--oop!    Soo--oop of the e--e--evening,      Beautiful, beautiful Soup!
    
    
      183
      'I DON'T know,' said the Caterpillar.
      0.651287
      'I don't see,' said the Caterpillar.
    
    
      154
      'You!' said the Caterpillar contemptuously. 'Who are YOU?'
      0.707552
      'Who are YOU?' said the Caterpillar.
    
    
      734
      'Nothing whatever,' said Alice.
      0.748197
      'Nothing,' said Alice.
    
    
      733
      'Nothing WHATEVER?' persisted the King.
      0.754383
      'Nothing whatever,' said Alice.
    
    
      651
      'Soo--oop of the e--e--evening,      Beautiful, beautiful Soup!'
      0.775282
      'Beautiful Soup, so rich and green,    Waiting in a hot tureen!    Who for such dainties would not stoop?    Soup of the evening, beautiful Soup!    Soup of the evening, beautiful Soup!      Beau--ootiful Soo--oop!      Beau--ootiful Soo--oop!    Soo--oop of the e--e--evening,      Beautiful, beautiful Soup!
    
    
      315
      'Not the same thing a bit!' said the Hatter. 'You might just as well say that "I see what I eat" is the same thing as "I eat what I see"!'
      0.815753
      'You might just as well say,' added the March Hare, 'that "I like what I get" is the same thing as "I get what I like"!'
    
    
      554
      'What was THAT like?' said Alice.
      0.822642
      'What for?' said Alice.
    
    
      678
      'Give your evidence,' said the King; 'and don't be nervous, or I'll have you executed on the spot.'
      0.846233
      'Give your evidence,' the King repeated angrily, 'or I'll have you executed, whether you're nervous or not.'
    
  







    



Wall time: 12.5 s

Compare all paragraphs in one book to all books

Number of comparisons ~ 500 million



In [9]:

    
all_book_titles = books.title.unique().tolist()



In [10]:

    
%%time
book_title = "Alice'S Adventures In Wonderland"
close_matches = compare_book_to_books(book_title, all_book_titles, books)









    



Perfect matches






    





  
    
      
      Text 1
      Distance
      Text 2
      Title
    
  
  
    
      797
      THE END
      0
      THE END
      Dracula
    
    
      259
      CHORUS.
      0
      Chorus.
      The Complete Works Of William Shakespeare
    
    
      260
      'Wow! wow! wow!'
      0
      'Tu whu! Tu whu! Tu whu!'
      Grimms' Fairy Tales
    
    
      1
      Lewis Carroll
      0
      LEWIS WALLACE
      Ben-Hur
    
    
      2
      THE MILLENNIUM FULCRUM EDITION 3.0
      0
      The Millennium Fulcrum Edition 1.7
      Through The Looking-Glass
    
  







    




Close matches






    





  
    
      
      Text 1
      Distance
      Text 2
      Title
    
  
  
    
      338
      'I don't know what you mean,' said Alice.
      0.511965
      'I don't know what you mean by "glory,"' Alice said.
      Through The Looking-Glass
    
    
      586
      'Very much indeed,' said Alice.
      0.542440
      'Very much indeed,' Alice said politely.
      Through The Looking-Glass
    
    
      542
      'Certainly not!' said Alice indignantly.
      0.654421
      'Certainly,' said Alice.
      Through The Looking-Glass
    
    
      612
      'And what are they made of?' Alice asked in a tone of great curiosity.
      0.658161
      'But what are they for?' Alice asked in a tone of great curiosity.
      Through The Looking-Glass
    
    
      391
      'Of course they were', said the Dormouse; '--well in.'
      0.693690
      'Of course,' was my answer; 'of course we are.'
      Wuthering Heights
    
    
      786
      'I won't!' said Alice.
      0.707242
      'I see you don't,' said Alice.
      Through The Looking-Glass
    
    
      271
      'I don't much care where--' said Alice.
      0.721446
      'I see you don't,' said Alice.
      Through The Looking-Glass
    
    
      592
      Will you, won't you, will you, won't you, will you join the dance?  Will you, won't you, will you, won't you, won't you join the dance?
      0.728049
      "You won't, won't you?  Well, I sh'd _reckon_ you won't!"
      Adventures Of Huckleberry Finn, Complete
    
    
      596
      Will you, won't you, will you, won't you, will you join the dance?  Will you, won't you, will you, won't you, won't you join the dance?"'
      0.728398
      "You won't, won't you?  Well, I sh'd _reckon_ you won't!"
      Adventures Of Huckleberry Finn, Complete
    
    
      435
      'Yes!' shouted Alice.
      0.733234
      'Yes, if you like,' said Alice.
      Through The Looking-Glass
    
    
      749
      'What's in it?' said the Queen.
      0.742012
      'What did he want?' said the Red Queen.
      Through The Looking-Glass
    
    
      604
      'Thank you,' said Alice, 'it's very interesting. I never knew so much about a whiting before.'
      0.742622
      'Thank you very much,' said Alice.
      Through The Looking-Glass
    
    
      752
      'Who is it directed to?' said one of the jurymen.
      0.760924
      'Who is there?'
      Grimms' Fairy Tales
    
    
      283
      'I suppose so,' said Alice.
      0.785349
      'I see you don't,' said Alice.
      Through The Looking-Glass
    
    
      674
      'It isn't mine,' said the Hatter.
      0.786595
      'It was.'
      Wuthering Heights
    
    
      651
      'Soo--oop of the e--e--evening,      Beautiful, beautiful Soup!'
      0.793615
      Teneri sdegni, e placide, e tranquille         Repulse, e cari vezzi, e liete paci,         Sorrisi, parolette, e dolci stille         Di pianto, e sospir tronchi, e molli baci."
      The Iliad Of Homer
    
    
      0
      ALICE'S ADVENTURES IN WONDERLAND
      0.808307
      ADVENTURES
      Adventures Of Huckleberry Finn, Complete
    
    
      717
      'Shan't,' said the cook.
      0.809247
      "And cooks?"
      The Count Of Monte Cristo
    
    
      183
      'I DON'T know,' said the Caterpillar.
      0.809791
      'I don't know,' I said.
      Wuthering Heights
    
    
      340
      'Perhaps not,' Alice cautiously replied: 'but I know I have to beat time when I learn music.'
      0.809837
      'Perhaps it would,' Alice replied cautiously.
      Through The Looking-Glass
    
  







    



Wall time: 8min 45s



In [11]:

    
close_matches[:100].Title.value_counts()









    Out[11]:





Through The Looking-Glass                   57
Wuthering Heights                           15
The Man In The Iron Mask                    4 
The Adventures Of Sherlock Holmes           4 
Adventures Of Huckleberry Finn, Complete    3 
The Count Of Monte Cristo                   2 
A Study In Scarlet                          2 
The Works Of Edgar Allan Poe                2 
Dracula                                     1 
Grimms' Fairy Tales                         1 
The Wonderful Wizard Of Oz                  1 
A Tale Of Two Cities                        1 
Pride And Prejudice                         1 
Robin Hood                                  1 
The Phantom Of The Opera                    1 
The Iliad Of Homer                          1 
Notre-Dame De Paris                         1 
Great Expectations                          1 
Name: Title, dtype: int64



In [12]:

    
%%time
book_title = "The Adventures Of Sherlock Holmes"
close_matches = compare_book_to_books(book_title, all_book_titles, books)









    



Perfect matches






    





  
    
      
      Text 1
      Distance
      Text 2
      Title
    
  
  
    
      1180
      "Certainly."
      0
      "Certainly."
      The Count Of Monte Cristo
    
    
      124
      "It was."
      0
      "It was."
      The Count Of Monte Cristo
    
    
      1413
      "Yes, sir."
      0
      "Yes, sir."
      The Count Of Monte Cristo
    
    
      389
      "Yes."
      0
      "Yes."
      The Count Of Monte Cristo
    
    
      1465
      "What, then?"
      0
      "What then?"
      The Count Of Monte Cristo
    
    
      936
      "Entirely."
      0
      "Entirely."
      The Count Of Monte Cristo
    
    
      1501
      "Always."
      0
      "Always."
      The Count Of Monte Cristo
    
    
      1502
      "And why?"
      0
      "And why?"
      The Count Of Monte Cristo
    
    
      916
      "Ah!"
      0
      "Ah!"
      The Man In The Iron Mask
    
    
      1524
      "Yes, all."
      0
      "Yes, all."
      The Three Musketeers
    
    
      103
      "And Irene Adler?"
      0
      "And _Jim_?"
      Adventures Of Huckleberry Finn, Complete
    
    
      1534
      "By no means."
      0
      "By no means."
      The Man In The Iron Mask
    
    
      99
      "But how?"
      0
      "But how?"
      The Three Musketeers
    
    
      1380
      "Well?"
      0
      "Well?"
      The Count Of Monte Cristo
    
    
      911
      "Nothing."
      0
      "Nothing."
      The Count Of Monte Cristo
    
    
      1584
      "So it appears."
      0
      "It appears so."
      The Three Musketeers
    
    
      222
      III.
      0
      III.
      The Works Of Edgar Allan Poe
    
    
      821
      "What?"
      0
      "What?"
      The Count Of Monte Cristo
    
    
      225
      "Not yet."
      0
      "Not yet."
      Great Expectations
    
    
      369
      "'Yes.'
      0
      "'Yes.'
      A Study In Scarlet
    
    
      548
      "None."
      0
      "None."
      The Count Of Monte Cristo
    
    
      1668
      "No."
      0
      "No."
      The Count Of Monte Cristo
    
    
      1685
      I nodded again.
      0
      I nodded again.
      The Works Of Edgar Allan Poe
    
    
      188
      "What then?"
      0
      "What then?"
      The Count Of Monte Cristo
    
    
      126
      II.
      0
      II.
      The Works Of Edgar Allan Poe
    
    
      965
      "It is possible."
      0
      "It is possible."
      The Count Of Monte Cristo
    
    
      130
      "What is it?"
      0
      "What is it?"
      The Count Of Monte Cristo
    
    
      1014
      "Yes?"
      0
      "Yes?"
      The Count Of Monte Cristo
    
    
      1067
      "Where is it, then?"
      0
      "Where is it, then?"
      Adventures Of Huckleberry Finn, Complete
    
    
      1002
      "What do you mean?"
      0
      "What do you mean?"
      The Count Of Monte Cristo
    
    
      999
      "Well."
      0
      "Well."
      The Count Of Monte Cristo
    
    
      521
      "No?"
      0
      "No?"
      A Tale Of Two Cities
    
    
      174
      "Precisely."
      0
      "Precisely."
      The Count Of Monte Cristo
    
    
      1117
      "I do."
      0
      "I do."
      The Count Of Monte Cristo
    
    
      1154
      "Very much so."
      0
      "Very much so."
      Around The World In 80 Days
    
    
      155
      "Not in the least."
      0
      "Not in the least."
      The Count Of Monte Cristo
    
    
      1172
      "Never."
      0
      "Never."
      The Count Of Monte Cristo
    
    
      1338
      "By all means."
      0
      "By all means."
      The Count Of Monte Cristo
    
    
      791
      "BALLARAT."
      0
      "Oliva Corsinari."
      The Count Of Monte Cristo
    
    
      1786
      "'Entirely.'
      0
      "'Signed El-Kobbir.'
      The Count Of Monte Cristo
    
    
      1016
      "What will you do, then?"
      0
      "What will you do, then?"
      The Man In The Iron Mask
    
    
      2143
      "'Ample.'
      0
      "'Signed El-Kobbir.'
      The Count Of Monte Cristo
    
    
      2243
      "Do you know him?"
      0
      "Do you know him?"
      Great Expectations
    
    
      2281
      "Where to?"
      0
      "Where to?"
      The Count Of Monte Cristo
    
    
      582
      "Of what?"
      0
      "Of what?"
      The Three Musketeers
    
    
      751
      "And that is--"
      0
      "And that is--"
      Around The World In 80 Days
    
    
      1943
      "Pray do so."
      0
      "Pray do so."
      The Count Of Monte Cristo
    
    
      21
      "How often?"
      0
      "How often?"
      Great Expectations
    
    
      2049
      "And how?"
      0
      "And how?"
      Dracula
    
    
      719
      "Yes, certainly."
      0
      "Yes, certainly."
      The Count Of Monte Cristo
    
    
      714
      "The doctor?"
      0
      "The doctor?"
      The Count Of Monte Cristo
    
    
      651
      "What are they?"
      0
      "What are they?"
      The Count Of Monte Cristo
    
    
      1
      by
      0
      by
      The Phantom Of The Opera
    
    
      5
      I.
      0
      I.
      The Works Of Edgar Allan Poe
    
    
      1873
      "Dr. Becher's."
      0
      Merrick's "Tryphiodorus," 148, 99.
      The Iliad Of Homer
    
  







    




Close matches






    





  
    
      
      Text 1
      Distance
      Text 2
      Title
    
  
  
    
      704
      "I think that it is very probable."
      0.204008
      "I think that is very probable."
      A Journey To The Centre Of The Earth
    
    
      415
      "And what did you see?"
      0.220975
      "What did you see?"
      The Phantom Of The Opera
    
    
      1642
      "But what will you do?"
      0.272276
      "But you, what will you do?"
      The Man In The Iron Mask
    
    
      202
      "You have the photograph?"
      0.282313
      "Have you?"
      Great Expectations
    
    
      2492
      "Yes, the wine-cellar."
      0.284365
      "Yes."
      The Count Of Monte Cristo
    
    
      163
      "And what then?"
      0.285819
      "What then?"
      The Count Of Monte Cristo
    
    
      1924
      "And is that all?"
      0.289264
      "Is that all?"
      The Man In The Iron Mask
    
    
      1254
      "What, then, did Peterson do?"
      0.299098
      "What did he do, then?"
      The Man In The Iron Mask
    
    
      402
      "What are you going to do, then?" I asked.
      0.311689
      "What are you going to do?" I asked.
      Dracula
    
    
      135
      "And what of Irene Adler?" I asked.
      0.339803
      "What?"  I asked.
      None
    
    
      115
      "Absolutely?"
      0.340286
      "Absolutely."
      The Count Of Monte Cristo
    
    
      1146
      "That is possible."
      0.352484
      "Is that possible?"
      The Count Of Monte Cristo
    
    
      1132
      "How can you tell that?"
      0.355596
      "How can I tell you?"
      The Count Of Monte Cristo
    
    
      807
      "And the cigar-holder?"
      0.364676
      "And _Jim_?"
      Adventures Of Huckleberry Finn, Complete
    
    
      106
      "I am sure."
      0.367847
      "I am sure of it."
      The Count Of Monte Cristo
    
    
      2346
      "It is half-past ten now."
      0.378988
      It was now half-past ten.
      Around The World In 80 Days
    
    
      991
      "To the police?"
      0.379647
      "The police."
      The Count Of Monte Cristo
    
    
      235
      "But to whom?"
      0.381023
      "To whom?"
      The Count Of Monte Cristo
    
    
      797
      "Quite so."
      0.385039
      "Not quite so."
      The Man In The Iron Mask
    
    
      777
      "But who is he?"
      0.393689
      "Who is he?"
      The Count Of Monte Cristo
    
  







    



Wall time: 14min 21s



In [13]:

    
close_matches[:200].Title.value_counts()









    Out[13]:





The Count Of Monte Cristo                   63
The Man In The Iron Mask                    19
A Study In Scarlet                          17
The Three Musketeers                        13
A Tale Of Two Cities                        12
The Works Of Edgar Allan Poe                11
Around The World In 80 Days                 8 
Great Expectations                          7 
Peter Pan                                   6 
Adventures Of Huckleberry Finn, Complete    6 
Pride And Prejudice                         5 
Dracula                                     4 
Moby Dick; Or The Whale                     3 
Through The Looking-Glass                   3 
The Phantom Of The Opera                    3 
Emma                                        3 
A Journey To The Centre Of The Earth        3 
Notre-Dame De Paris                         2 
The Adventures Of Tom Sawyer, Complete      2 
The Invisible Man                           2 
Wuthering Heights                           1 
The Picture Of Dorian Gray                  1 
The War Of The Worlds                       1 
The Island Of Doctor Moreau                 1 
Name: Title, dtype: int64

Compare all paragraphs in all books to all books

Number of comparisons > 13 billion



In [18]:

    
%%time
def compare_all_books(books, n_close_matches=20):
    vectorizer = TfidfVectorizer(max_df=.7, min_df=.0001, tokenizer=LemmaTokenizer()).fit(books.contents)
    vect_book = vectorizer.transform(books.contents)
    
    results = {"book_1_title":[],
               "book_1_paragraph":[],
               "book_2_title":[],
               "book_2_paragraph":[],
               "paragraph_distance":[]}
    
    book_titles = books.title.dropna().unique().tolist()
    
    for book_title in book_titles:
        book_mask = (books.title == book_title).values
        other_book_mask = ~book_mask
        
        nbrs = NearestNeighbors(n_neighbors=1, algorithm='brute').fit(vect_book[other_book_mask])
        distances, indices = nbrs.kneighbors(vect_book[book_mask])
        
        book_content = books.loc[book_mask, "contents"].tolist()
        results["book_1_paragraph"].extend(book_content)
        
        matches = books[other_book_mask].contents.values[indices.flatten()]
        results["book_2_paragraph"].extend(matches)
        
        book_1_title = [book_title] * sum(book_mask)
        results["book_1_title"].extend(book_1_title)
        
        book_2_title = books[other_book_mask].title.values[indices.flatten()]
        results["book_2_title"].extend(book_2_title)
        
        results["paragraph_distance"].extend(distances.flatten())

    results_sorted_by_distance = pd.DataFrame(results).sort_values("paragraph_distance")
    
    return results_sorted_by_distance

results = compare_all_books(books)









    



Wall time: 28min 44s



In [55]:

    
results[results.paragraph_distance == 0].drop_duplicates()[:100]









    Out[55]:






  
    
      
      book_1_paragraph
      book_1_title
      book_2_paragraph
      book_2_title
      paragraph_distance
    
  
  
    
      114317
      THE END
      The Complete Works Of William Shakespeare
      THE END
      Dracula
      0
    
    
      8445
      "Certainly."
      The Count Of Monte Cristo
      "Certainly."
      The Man In The Iron Mask
      0
    
    
      8430
      "Probably."
      The Count Of Monte Cristo
      "Probably."
      The Man In The Iron Mask
      0
    
    
      45108
      "Whom?"
      The Phantom Of The Opera
      "Whom?"
      The Three Musketeers
      0
    
    
      8425
      "Are you sure of it?"
      The Count Of Monte Cristo
      "Are you sure of it?"
      The Three Musketeers
      0
    
    
      19244
      "Exactly."
      The Three Musketeers
      "Exactly."
      The Count Of Monte Cristo
      0
    
    
      45146
      "Co-ack!"
      The Phantom Of The Opera
      "Shelled!"
      The Works Of Edgar Allan Poe
      0
    
    
      8411
      "How do you know?"
      The Count Of Monte Cristo
      "How do you know?"
      The Phantom Of The Opera
      0
    
    
      69129
      "Never."
      A Journey To The Centre Of The Earth
      "Never."
      The Count Of Monte Cristo
      0
    
    
      8406
      "Yes."
      The Count Of Monte Cristo
      "Yes."
      The Man In The Iron Mask
      0
    
    
      8450
      "Well?"
      The Count Of Monte Cristo
      "Well?"
      The Man In The Iron Mask
      0
    
    
      69101
      [Illustration: Runic Glyphs]
      A Journey To The Centre Of The Earth
      [Illustration: colophon]
      Dracula
      0
    
    
      8403
      "What do you mean?"
      The Count Of Monte Cristo
      "What do you mean?"
      The Man In The Iron Mask
      0
    
    
      45172
      "We don't know."
      The Phantom Of The Opera
      "We don't know."
      The Man In The Iron Mask
      0
    
    
      69034
      TABLE OF CONTENTS
      A Journey To The Centre Of The Earth
      TABLE OF CONTENTS.
      Notre-Dame De Paris
      0
    
    
      2720
      "Where?"
      The Count Of Monte Cristo
      "Where?"
      The Man In The Iron Mask
      0
    
    
      8389
      "No."
      The Count Of Monte Cristo
      "No."
      The Man In The Iron Mask
      0
    
    
      69033
      By Jules Verne
      A Journey To The Centre Of The Earth
      by Jules Verne
      None
      0
    
    
      69031
      Produced by Norm Wolcott
      A Journey To The Centre Of The Earth
      Produced by Dennis Amundson.
      Dorothy And The Wizard In Oz
      0
    
    
      69024
      "What?"
      Around The World In 80 Days
      "What?"
      The Count Of Monte Cristo
      0
    
    
      69090
      mm.rnlls esruel seecJde      sgtssmf unteief niedrke      kt,samn atrateS Saodrrn      emtnaeI nuaect  rrilSa      Atvaar  .nscrc  ieaabs      ccdrmi  eeutul  frantu      dt,iac  oseibo  KediiY
      A Journey To The Centre Of The Earth
      _By_
      The Three Musketeers
      0
    
    
      45057
      "What did you see?"
      The Phantom Of The Opera
      "What did you see?"
      None
      0
    
    
      45047
      "How?"
      The Phantom Of The Opera
      "How?"
      The Count Of Monte Cristo
      0
    
    
      69412
      "Efter."
      A Journey To The Centre Of The Earth
      "Oliva Corsinari."
      The Count Of Monte Cristo
      0
    
    
      69386
      "What is to be done?"
      A Journey To The Centre Of The Earth
      "What is to be done?"
      Great Expectations
      0
    
    
      69384
      "Why?"
      A Journey To The Centre Of The Earth
      "Why?"
      The Count Of Monte Cristo
      0
    
    
      69356
      "Why so?"
      A Journey To The Centre Of The Earth
      "Why so?"
      The Count Of Monte Cristo
      0
    
    
      108331
      Enter VARRIUS
      The Complete Works Of William Shakespeare
      Enter Castigilone.
      The Works Of Edgar Allan Poe
      0
    
    
      69348
      "Exactly so."
      A Journey To The Centre Of The Earth
      "Exactly so."
      The Count Of Monte Cristo
      0
    
    
      8502
      "By whom?"
      The Count Of Monte Cristo
      "By whom?"
      A Tale Of Two Cities
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      45371
      "Perhaps."
      The Phantom Of The Opera
      "Perhaps."
      The Count Of Monte Cristo
      0
    
    
      68880
      "I am."
      Around The World In 80 Days
      "I am."
      The Count Of Monte Cristo
      0
    
    
      45377
      "I promise."
      The Phantom Of The Opera
      "I promise."
      The Count Of Monte Cristo
      0
    
    
      45378
      "When?"
      The Phantom Of The Opera
      "When?"
      The Count Of Monte Cristo
      0
    
    
      68870
      "Yes."
      Around The World In 80 Days
      "Yes."
      The Count Of Monte Cristo
      0
    
    
      45379
      "To-morrow."
      The Phantom Of The Opera
      "To-morrow."
      The Count Of Monte Cristo
      0
    
    
      68856
      "Agreed."
      Around The World In 80 Days
      "Agreed."
      The Count Of Monte Cristo
      0
    
    
      94799
      "Listen!" said he.
      The Adventures Of Tom Sawyer, Complete
      "Listen!" he said.
      The War Of The Worlds
      0
    
    
      8318
      "Indeed?"
      The Count Of Monte Cristo
      "Indeed?"
      The Man In The Iron Mask
      0
    
    
      69441
      "Et quacunque viam dederit fortuna sequamur."
      A Journey To The Centre Of The Earth
      "Justum et tenacem propositi virum."
      The Count Of Monte Cristo
      0
    
    
      19453
      "Bah!"
      The Three Musketeers
      "Bah!"
      The Count Of Monte Cristo
      0
    
    
      44854
      "How do you know?"
      The Phantom Of The Opera
      "How do you know?"
      The Count Of Monte Cristo
      0
    
    
      2622
      "I swear to you I will."
      The Count Of Monte Cristo
      "I will, I swear to you."
      The Three Musketeers
      0
    
    
      70117
      "Certainly."
      A Journey To The Centre Of The Earth
      "Certainly."
      The Count Of Monte Cristo
      0
    
    
      70098
      "Do you think so?"
      A Journey To The Centre Of The Earth
      "Do you think so?"
      The Count Of Monte Cristo
      0
    
    
      94195
      "Yes."
      The Adventures Of Tom Sawyer, Complete
      "Yes."
      The Count Of Monte Cristo
      0
    
    
      70059
      "And what is that?"
      A Journey To The Centre Of The Earth
      "And what is that?"
      The Count Of Monte Cristo
      0
    
    
      18866
      "When?"
      The Three Musketeers
      "When?"
      The Count Of Monte Cristo
      0
    
    
      8734
      "But what?"
      The Count Of Monte Cristo
      "But what?"
      The Man In The Iron Mask
      0
    
    
      44419
      I
      Metamorphosis
      I.
      The Adventures Of Sherlock Holmes
      0
    
    
      108664
      SONG.
      The Complete Works Of William Shakespeare
      SONG
      The Works Of Edgar Allan Poe
      0
    
    
      2507
      "Precisely."
      The Count Of Monte Cristo
      "Precisely."
      The Man In The Iron Mask
      0
    
    
      44450
      II
      Metamorphosis
      II.
      The Adventures Of Sherlock Holmes
      0
    
    
      44480
      III
      Metamorphosis
      III.
      The Adventures Of Sherlock Holmes
      0
    
    
      44521
      by
      The Phantom Of The Opera
      by
      The Adventures Of Sherlock Holmes
      0
    
    
      8724
      "Impossible!"
      The Count Of Monte Cristo
      "Impossible!"
      The Picture Of Dorian Gray
      0
    
    
      44522
      Gaston Leroux
      The Phantom Of The Opera
      _By_
      The Three Musketeers
      0
    
    
      70123
      "I am sure of it."
      A Journey To The Centre Of The Earth
      "I am sure of it."
      The Count Of Monte Cristo
      0
    
    
      18818
      "I think not."
      The Three Musketeers
      "I think not."
      The Count Of Monte Cristo
      0
    
    
      29580
      "Well?"
      Dracula
      "Well?"
      The Count Of Monte Cristo
      0
    
  

100 rows × 5 columns



In [54]:

    
results[results.paragraph_distance > 0].drop_duplicates("paragraph_distance")[:100]









    Out[54]:






  
    
      
      book_1_paragraph
      book_1_title
      book_2_paragraph
      book_2_title
      paragraph_distance
    
  
  
    
      13890
      "Raoul! Raoul!"
      The Man In The Iron Mask
      "Raoul!  Raoul!  Raoul!"
      The Phantom Of The Opera
      0.053526
    
    
      37674
      Produced by David Widger and Carlo Traverso
      The Works Of Edgar Allan Poe
      Produced by David Widger
      Adventures Of Huckleberry Finn, Complete
      0.109705
    
    
      107978
      If you discover a Defect in this etext within 90 days of receiv- ing it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from.  If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy.  If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.
      The Complete Works Of William Shakespeare
      If you discover a Defect in this etext within 90 days of receiving it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from.  If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy.  If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.
      None
      0.127986
    
    
      6588
      "Oh, heavens!"
      The Count Of Monte Cristo
      "Oh, Heaven!  Oh, Heaven!"
      The Phantom Of The Opera
      0.145526
    
    
      44943
      "Raoul!"
      The Phantom Of The Opera
      "Raoul! Raoul!"
      The Man In The Iron Mask
      0.153920
    
    
      56625
      Soldiers began then to make on the barrow           The largest of dead-fires: dark o'er the vapor           The smoke-cloud ascended, the sad-roaring fire,        10 Mingled with weeping (the wind-roar subsided)           Till the building of bone it had broken to pieces,           Hot in the heart. Heavy in spirit           They mood-sad lamented the men-leader's ruin;           And mournful measures the much-grieving widow        15 *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *        20 *       *       *       *       *       *       *
      Beowulf
      * Brucoea ferruginea.
      The Count Of Monte Cristo
      0.154537
    
    
      97495
      THE PREFACE
      The Picture Of Dorian Gray
      AUTHOR’S PREFACE
      The Three Musketeers
      0.156077
    
    
      56317
      65 Dragon, to govern, who guarded a treasure,           A high-rising stone-cliff, on heath that was grayish:           A path 'neath it lay, unknown unto mortals.           Some one of earthmen entered the mountain,           The heathenish hoard laid hold of with ardor;        70 *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *           *       *       *       *       *       *       *
      Beowulf
      * Brucoea ferruginea.
      The Count Of Monte Cristo
      0.156910
    
    
      104938
      BOOK FOURTH.
      Notre-Dame De Paris
      The Fourth Book
      Leviathan
      0.158824
    
    
      36765
      "Surely, surely."
      A Tale Of Two Cities
      "Surely."
      None
      0.159182
    
    
      33375
      "Thank you. Thank you."
      Great Expectations
      "Thank you."
      The Man In The Iron Mask
      0.163576
    
    
      107021
      "What next?"
      Notre-Dame De Paris
      "What next? what next?"
      The Three Musketeers
      0.165573
    
    
      110612
      THE PROLOGUE.
      The Complete Works Of William Shakespeare
      Prologue
      The Phantom Of The Opera
      0.168098
    
    
      21386
      "Silence!"
      The Three Musketeers
      "Silence! silence!"
      Notre-Dame De Paris
      0.170804
    
    
      103595
      The Third Book
      Leviathan
      BOOK THIRD.
      Notre-Dame De Paris
      0.175778
    
    
      101920
      THE INTRODUCTION
      Leviathan
      INTRODUCTION.
      The Island Of Doctor Moreau
      0.181112
    
    
      39109
      "Yes, yes," I said, "yes, yes."
      The Works Of Edgar Allan Poe
      "Yes, yes."
      The Man In The Iron Mask
      0.183521
    
    
      44577
      "Mother!  Mother!"
      The Phantom Of The Opera
      "Mother!"
      Peter Pan
      0.185424
    
    
      104390
      BOOK SECOND.
      Notre-Dame De Paris
      The Second Book
      Leviathan
      0.186028
    
    
      18980
      "Speak!"
      The Three Musketeers
      "Speak! speak!"
      The Man In The Iron Mask
      0.188020
    
    
      42690
      'Yes.'
      Wuthering Heights
      "'Yes.'
      A Study In Scarlet
      0.188326
    
    
      44611
      "Of course, of course."
      The Phantom Of The Opera
      "Of course."
      The Count Of Monte Cristo
      0.190496
    
    
      14879
      "What sort of man is he?"
      The Man In The Iron Mask
      "What sort of a man is he?"
      The Count Of Monte Cristo
      0.194105
    
    
      10523
      "Sir! sir!"
      The Count Of Monte Cristo
      "Sir!"
      The Works Of Edgar Allan Poe
      0.194482
    
    
      56323
      *       *       *       *       *       *       *           He sought of himself who sorely did harm him,           But, for need very pressing, the servant of one of           The sons of the heroes hate-blows evaded,         5 Seeking for shelter and the sin-driven warrior           Took refuge within there. He early looked in it,           *       *       *       *       *       *       *           *       *       *       *       *       *       * [76]      *  *  *  *  *  * when the onset surprised him,
      Beowulf
      * Brucoea ferruginea.
      The Count Of Monte Cristo
      0.195603
    
    
      106566
      "_De ventre inferi clamavi, et exaudisti vocem meam_.
      Notre-Dame De Paris
      "Justum et tenacem propositi virum."
      The Count Of Monte Cristo
      0.196945
    
    
      15113
      "Yes, a thousand times, yes!"
      The Man In The Iron Mask
      "Yes; a thousand times, yes!"
      The Count Of Monte Cristo
      0.198731
    
    
      54894
      "Why? Why?"
      Youth
      "Why?"
      The Count Of Monte Cristo
      0.201626
    
    
      66428
      "True, true."
      Peter Pan
      "True."
      The Man In The Iron Mask
      0.202981
    
    
      101882
      THE SECOND PART
      Leviathan
      SECOND PART
      None
      0.203286
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      74665
      "Carrot ices."
      War And Peace
      "Ice, ice, and more ice."
      None
      0.265868
    
    
      107974
      (1) The etext, when displayed, is clearly readable, and does   *not* contain characters other than those intended by the   author of the work, although tilde (~), asterisk (*) and   underline (_) characters may be used to convey punctuation   intended by the author, and additional characters may be used   to indicate hypertext links; OR
      The Complete Works Of William Shakespeare
      [*]  The etext, when displayed, is clearly readable, and           does *not* contain characters other than those           intended by the author of the work, although tilde           (~), asterisk (*) and underline (_) characters may           be used to convey punctuation intended by the           author, and additional characters may be used to           indicate hypertext links; OR
      None
      0.266594
    
    
      3455
      "The deuce."
      The Count Of Monte Cristo
      "The deuce!"
      The Man In The Iron Mask
      0.271376
    
    
      48253
      "Why not?
      Moby Dick; Or The Whale
      "Why not?"
      The Count Of Monte Cristo
      0.271485
    
    
      27288
      "But what will you do?"
      The Adventures Of Sherlock Holmes
      "But you, what will you do?"
      The Man In The Iron Mask
      0.271746
    
    
      24264
      "How do you know that?"
      The Three Musketeers
      "How do you know?"
      The Count Of Monte Cristo
      0.272952
    
    
      16618
      *Alexandre Dumas, Pere*
      The Three Musketeers
      by Alexandre Dumas, Pere
      The Count Of Monte Cristo
      0.274533
    
    
      36704
      "How do you know it?"
      A Tale Of Two Cities
      "How do you know?"
      The Count Of Monte Cristo
      0.277022
    
    
      41820
      OF ENGLAND
      The Works Of Edgar Allan Poe
      Sunderland, England
      None
      0.277707
    
    
      107188
      Mathias Hungadi shook his head.
      Notre-Dame De Paris
      He shook his head.
      A Tale Of Two Cities
      0.278924
    
    
      42377
      A DREAM
      The Works Of Edgar Allan Poe
      Dreams
      Leviathan
      0.280482
    
    
      62493
      "Yes, ma'am, all."
      Pride And Prejudice
      "Yes, ma'am."
      Great Expectations
      0.284228
    
    
      45747
      "I don't understand!"
      The Phantom Of The Opera
      "But I don't understand!"
      None
      0.285117
    
    
      28138
      "Yes, the wine-cellar."
      The Adventures Of Sherlock Holmes
      "Yes."
      The Count Of Monte Cristo
      0.285540
    
    
      25809
      "And what then?"
      The Adventures Of Sherlock Holmes
      "What then?"
      The Count Of Monte Cristo
      0.286767
    
    
      92638
      "Well, guess."
      Adventures Of Huckleberry Finn, Complete
      "You guess well."
      The Count Of Monte Cristo
      0.287479
    
    
      27570
      "And is that all?"
      The Adventures Of Sherlock Holmes
      "Is that all?"
      The Man In The Iron Mask
      0.290204
    
    
      17788
      "But where are you going?"
      The Three Musketeers
      "Where are you going?"
      A Study In Scarlet
      0.291094
    
    
      3855
      "Of whom?"
      The Count Of Monte Cristo
      "Whom?"
      The Three Musketeers
      0.294814
    
    
      23333
      "Swear."
      The Three Musketeers
      "Swear it."
      The Count Of Monte Cristo
      0.295559
    
    
      91037
      "Yes--indeedy."
      Adventures Of Huckleberry Finn, Complete
      "Yes--yes."
      A Journey To The Centre Of The Earth
      0.296599
    
    
      3100
      "And who is Beppo?"
      The Count Of Monte Cristo
      "Who is Darius?"
      The Phantom Of The Opera
      0.296669
    
    
      82013
      "But what did you hear?"
      War And Peace
      "What did you hear?"
      Sense And Sensibility
      0.296731
    
    
      11477
      "I hear you, monsieur."
      The Man In The Iron Mask
      "I hear, messieurs."
      A Tale Of Two Cities
      0.297302
    
    
      42354
      TO ----
      The Works Of Edgar Allan Poe
      122 --_Nor pierced._
      The Iliad Of Homer
      0.297489
    
    
      46478
      A silence.
      The Phantom Of The Opera
      SILENCE
      The Works Of Edgar Allan Poe
      0.298765
    
    
      11900
      "What did he do, then?"
      The Man In The Iron Mask
      "What, then, did Peterson do?"
      The Adventures Of Sherlock Holmes
      0.299088
    
    
      32133
      "Not the least."
      Great Expectations
      "Not in the least."
      The Count Of Monte Cristo
      0.299178
    
    
      91747
      Romeo...................................... Mr. Garrick.
      Adventures Of Huckleberry Finn, Complete
      Frs.   Coupe, thirty-five francs.............................. 35.   From Chalons to Lyons you will go on by the steamboat..  6.   From Lyons to Avignon (still by steamboat)............. 16.   From Avignon to Marseilles, seven francs...............  7.   Expenses on the road, about fifty francs............... 50.   Total................................................. 114 frs.
      The Count Of Monte Cristo
      0.299452
    
    
      65834
      "Where, where?"
      Peter Pan
      "Where?"
      The Count Of Monte Cristo
      0.301358
    
  

100 rows × 5 columns

	author	contents	part	title
0	Alexandre Dumas, Pere	THE COUNT OF MONTE CRISTO	2	The Count Of Monte Cristo
1	Alexandre Dumas, Pere	by Alexandre Dumas, Pere	3	The Count Of Monte Cristo
2	Alexandre Dumas, Pere	On the 24th of February, 1815, the look-out at...	6	The Count Of Monte Cristo
3	Alexandre Dumas, Pere	As usual, a pilot put off immediately, and rou...	7	The Count Of Monte Cristo
4	Alexandre Dumas, Pere	Immediately, and according to custom, the ramp...	8	The Count Of Monte Cristo

	Text 1	Distance	Text 2
592	Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?	0.085699	Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?"'
647	'Beautiful Soup! Who cares for fish, Game, or any other dish? Who would not give all else for two Pennyworth only of beautiful Soup? Pennyworth only of beautiful Soup? Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beauti--FUL SOUP!'	0.581810	'Beautiful Soup, so rich and green, Waiting in a hot tureen! Who for such dainties would not stoop? Soup of the evening, beautiful Soup! Soup of the evening, beautiful Soup! Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beautiful Soup!
183	'I DON'T know,' said the Caterpillar.	0.651287	'I don't see,' said the Caterpillar.
154	'You!' said the Caterpillar contemptuously. 'Who are YOU?'	0.707552	'Who are YOU?' said the Caterpillar.
734	'Nothing whatever,' said Alice.	0.748197	'Nothing,' said Alice.
733	'Nothing WHATEVER?' persisted the King.	0.754383	'Nothing whatever,' said Alice.
651	'Soo--oop of the e--e--evening, Beautiful, beautiful Soup!'	0.775282	'Beautiful Soup, so rich and green, Waiting in a hot tureen! Who for such dainties would not stoop? Soup of the evening, beautiful Soup! Soup of the evening, beautiful Soup! Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beautiful Soup!
315	'Not the same thing a bit!' said the Hatter. 'You might just as well say that "I see what I eat" is the same thing as "I eat what I see"!'	0.815753	'You might just as well say,' added the March Hare, 'that "I like what I get" is the same thing as "I get what I like"!'
554	'What was THAT like?' said Alice.	0.822642	'What for?' said Alice.
678	'Give your evidence,' said the King; 'and don't be nervous, or I'll have you executed on the spot.'	0.846233	'Give your evidence,' the King repeated angrily, 'or I'll have you executed, whether you're nervous or not.'

	Text 1	Text 2	Title
797	THE END	THE END	Dracula
259	CHORUS.	Chorus.	The Complete Works Of William Shakespeare
260	'Wow! wow! wow!'	'Tu whu! Tu whu! Tu whu!'	Grimms' Fairy Tales
1	Lewis Carroll	LEWIS WALLACE	Ben-Hur
2	THE MILLENNIUM FULCRUM EDITION 3.0	The Millennium Fulcrum Edition 1.7	Through The Looking-Glass

	Text 1	Distance	Text 2	Title
338	'I don't know what you mean,' said Alice.	0.511965	'I don't know what you mean by "glory,"' Alice said.	Through The Looking-Glass
586	'Very much indeed,' said Alice.	0.542440	'Very much indeed,' Alice said politely.	Through The Looking-Glass
542	'Certainly not!' said Alice indignantly.	0.654421	'Certainly,' said Alice.	Through The Looking-Glass
612	'And what are they made of?' Alice asked in a tone of great curiosity.	0.658161	'But what are they for?' Alice asked in a tone of great curiosity.	Through The Looking-Glass
391	'Of course they were', said the Dormouse; '--well in.'	0.693690	'Of course,' was my answer; 'of course we are.'	Wuthering Heights
786	'I won't!' said Alice.	0.707242	'I see you don't,' said Alice.	Through The Looking-Glass
271	'I don't much care where--' said Alice.	0.721446	'I see you don't,' said Alice.	Through The Looking-Glass
592	Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?	0.728049	"You won't, won't you? Well, I sh'd _reckon_ you won't!"	Adventures Of Huckleberry Finn, Complete
596	Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?"'	0.728398	"You won't, won't you? Well, I sh'd _reckon_ you won't!"	Adventures Of Huckleberry Finn, Complete
435	'Yes!' shouted Alice.	0.733234	'Yes, if you like,' said Alice.	Through The Looking-Glass
749	'What's in it?' said the Queen.	0.742012	'What did he want?' said the Red Queen.	Through The Looking-Glass
604	'Thank you,' said Alice, 'it's very interesting. I never knew so much about a whiting before.'	0.742622	'Thank you very much,' said Alice.	Through The Looking-Glass
752	'Who is it directed to?' said one of the jurymen.	0.760924	'Who is there?'	Grimms' Fairy Tales
283	'I suppose so,' said Alice.	0.785349	'I see you don't,' said Alice.	Through The Looking-Glass
674	'It isn't mine,' said the Hatter.	0.786595	'It was.'	Wuthering Heights
651	'Soo--oop of the e--e--evening, Beautiful, beautiful Soup!'	0.793615	Teneri sdegni, e placide, e tranquille Repulse, e cari vezzi, e liete paci, Sorrisi, parolette, e dolci stille Di pianto, e sospir tronchi, e molli baci."	The Iliad Of Homer
0	ALICE'S ADVENTURES IN WONDERLAND	0.808307	ADVENTURES	Adventures Of Huckleberry Finn, Complete
717	'Shan't,' said the cook.	0.809247	"And cooks?"	The Count Of Monte Cristo
183	'I DON'T know,' said the Caterpillar.	0.809791	'I don't know,' I said.	Wuthering Heights
340	'Perhaps not,' Alice cautiously replied: 'but I know I have to beat time when I learn music.'	0.809837	'Perhaps it would,' Alice replied cautiously.	Through The Looking-Glass

	Text 1	Text 2	Title
1180	"Certainly."	"Certainly."	The Count Of Monte Cristo
124	"It was."	"It was."	The Count Of Monte Cristo
1413	"Yes, sir."	"Yes, sir."	The Count Of Monte Cristo
389	"Yes."	"Yes."	The Count Of Monte Cristo
1465	"What, then?"	"What then?"	The Count Of Monte Cristo
936	"Entirely."	"Entirely."	The Count Of Monte Cristo
1501	"Always."	"Always."	The Count Of Monte Cristo
1502	"And why?"	"And why?"	The Count Of Monte Cristo
916	"Ah!"	"Ah!"	The Man In The Iron Mask
1524	"Yes, all."	"Yes, all."	The Three Musketeers
103	"And Irene Adler?"	"And _Jim_?"	Adventures Of Huckleberry Finn, Complete
1534	"By no means."	"By no means."	The Man In The Iron Mask
99	"But how?"	"But how?"	The Three Musketeers
1380	"Well?"	"Well?"	The Count Of Monte Cristo
911	"Nothing."	"Nothing."	The Count Of Monte Cristo
1584	"So it appears."	"It appears so."	The Three Musketeers
222	III.	III.	The Works Of Edgar Allan Poe
821	"What?"	"What?"	The Count Of Monte Cristo
225	"Not yet."	"Not yet."	Great Expectations
369	"'Yes.'	"'Yes.'	A Study In Scarlet
548	"None."	"None."	The Count Of Monte Cristo
1668	"No."	"No."	The Count Of Monte Cristo
1685	I nodded again.	I nodded again.	The Works Of Edgar Allan Poe
188	"What then?"	"What then?"	The Count Of Monte Cristo
126	II.	II.	The Works Of Edgar Allan Poe
965	"It is possible."	"It is possible."	The Count Of Monte Cristo
130	"What is it?"	"What is it?"	The Count Of Monte Cristo
1014	"Yes?"	"Yes?"	The Count Of Monte Cristo
1067	"Where is it, then?"	"Where is it, then?"	Adventures Of Huckleberry Finn, Complete
1002	"What do you mean?"	"What do you mean?"	The Count Of Monte Cristo
999	"Well."	"Well."	The Count Of Monte Cristo
521	"No?"	"No?"	A Tale Of Two Cities
174	"Precisely."	"Precisely."	The Count Of Monte Cristo
1117	"I do."	"I do."	The Count Of Monte Cristo
1154	"Very much so."	"Very much so."	Around The World In 80 Days
155	"Not in the least."	"Not in the least."	The Count Of Monte Cristo
1172	"Never."	"Never."	The Count Of Monte Cristo
1338	"By all means."	"By all means."	The Count Of Monte Cristo
791	"BALLARAT."	"Oliva Corsinari."	The Count Of Monte Cristo
1786	"'Entirely.'	"'Signed El-Kobbir.'	The Count Of Monte Cristo
1016	"What will you do, then?"	"What will you do, then?"	The Man In The Iron Mask
2143	"'Ample.'	"'Signed El-Kobbir.'	The Count Of Monte Cristo
2243	"Do you know him?"	"Do you know him?"	Great Expectations
2281	"Where to?"	"Where to?"	The Count Of Monte Cristo
582	"Of what?"	"Of what?"	The Three Musketeers
751	"And that is--"	"And that is--"	Around The World In 80 Days
1943	"Pray do so."	"Pray do so."	The Count Of Monte Cristo
21	"How often?"	"How often?"	Great Expectations
2049	"And how?"	"And how?"	Dracula
719	"Yes, certainly."	"Yes, certainly."	The Count Of Monte Cristo
714	"The doctor?"	"The doctor?"	The Count Of Monte Cristo
651	"What are they?"	"What are they?"	The Count Of Monte Cristo
1	by	by	The Phantom Of The Opera
5	I.	I.	The Works Of Edgar Allan Poe
1873	"Dr. Becher's."	Merrick's "Tryphiodorus," 148, 99.	The Iliad Of Homer

	Text 1	Distance	Text 2	Title
704	"I think that it is very probable."	0.204008	"I think that is very probable."	A Journey To The Centre Of The Earth
415	"And what did you see?"	0.220975	"What did you see?"	The Phantom Of The Opera
1642	"But what will you do?"	0.272276	"But you, what will you do?"	The Man In The Iron Mask
202	"You have the photograph?"	0.282313	"Have you?"	Great Expectations
2492	"Yes, the wine-cellar."	0.284365	"Yes."	The Count Of Monte Cristo
163	"And what then?"	0.285819	"What then?"	The Count Of Monte Cristo
1924	"And is that all?"	0.289264	"Is that all?"	The Man In The Iron Mask
1254	"What, then, did Peterson do?"	0.299098	"What did he do, then?"	The Man In The Iron Mask
402	"What are you going to do, then?" I asked.	0.311689	"What are you going to do?" I asked.	Dracula
135	"And what of Irene Adler?" I asked.	0.339803	"What?" I asked.	None
115	"Absolutely?"	0.340286	"Absolutely."	The Count Of Monte Cristo
1146	"That is possible."	0.352484	"Is that possible?"	The Count Of Monte Cristo
1132	"How can you tell that?"	0.355596	"How can I tell you?"	The Count Of Monte Cristo
807	"And the cigar-holder?"	0.364676	"And _Jim_?"	Adventures Of Huckleberry Finn, Complete
106	"I am sure."	0.367847	"I am sure of it."	The Count Of Monte Cristo
2346	"It is half-past ten now."	0.378988	It was now half-past ten.	Around The World In 80 Days
991	"To the police?"	0.379647	"The police."	The Count Of Monte Cristo
235	"But to whom?"	0.381023	"To whom?"	The Count Of Monte Cristo
797	"Quite so."	0.385039	"Not quite so."	The Man In The Iron Mask
777	"But who is he?"	0.393689	"Who is he?"	The Count Of Monte Cristo

	book_1_paragraph	book_1_title	book_2_paragraph	book_2_title	paragraph_distance
114317	THE END	The Complete Works Of William Shakespeare	THE END	Dracula	0
8445	"Certainly."	The Count Of Monte Cristo	"Certainly."	The Man In The Iron Mask	0
8430	"Probably."	The Count Of Monte Cristo	"Probably."	The Man In The Iron Mask	0
45108	"Whom?"	The Phantom Of The Opera	"Whom?"	The Three Musketeers	0
8425	"Are you sure of it?"	The Count Of Monte Cristo	"Are you sure of it?"	The Three Musketeers	0
19244	"Exactly."	The Three Musketeers	"Exactly."	The Count Of Monte Cristo	0
45146	"Co-ack!"	The Phantom Of The Opera	"Shelled!"	The Works Of Edgar Allan Poe	0
8411	"How do you know?"	The Count Of Monte Cristo	"How do you know?"	The Phantom Of The Opera	0
69129	"Never."	A Journey To The Centre Of The Earth	"Never."	The Count Of Monte Cristo	0
8406	"Yes."	The Count Of Monte Cristo	"Yes."	The Man In The Iron Mask	0
8450	"Well?"	The Count Of Monte Cristo	"Well?"	The Man In The Iron Mask	0
69101	[Illustration: Runic Glyphs]	A Journey To The Centre Of The Earth	[Illustration: colophon]	Dracula	0
8403	"What do you mean?"	The Count Of Monte Cristo	"What do you mean?"	The Man In The Iron Mask	0
45172	"We don't know."	The Phantom Of The Opera	"We don't know."	The Man In The Iron Mask	0
69034	TABLE OF CONTENTS	A Journey To The Centre Of The Earth	TABLE OF CONTENTS.	Notre-Dame De Paris	0
2720	"Where?"	The Count Of Monte Cristo	"Where?"	The Man In The Iron Mask	0
8389	"No."	The Count Of Monte Cristo	"No."	The Man In The Iron Mask	0
69033	By Jules Verne	A Journey To The Centre Of The Earth	by Jules Verne	None	0
69031	Produced by Norm Wolcott	A Journey To The Centre Of The Earth	Produced by Dennis Amundson.	Dorothy And The Wizard In Oz	0
69024	"What?"	Around The World In 80 Days	"What?"	The Count Of Monte Cristo	0
69090	mm.rnlls esruel seecJde sgtssmf unteief niedrke kt,samn atrateS Saodrrn emtnaeI nuaect rrilSa Atvaar .nscrc ieaabs ccdrmi eeutul frantu dt,iac oseibo KediiY	A Journey To The Centre Of The Earth	_By_	The Three Musketeers	0
45057	"What did you see?"	The Phantom Of The Opera	"What did you see?"	None	0
45047	"How?"	The Phantom Of The Opera	"How?"	The Count Of Monte Cristo	0
69412	"Efter."	A Journey To The Centre Of The Earth	"Oliva Corsinari."	The Count Of Monte Cristo	0
69386	"What is to be done?"	A Journey To The Centre Of The Earth	"What is to be done?"	Great Expectations	0
69384	"Why?"	A Journey To The Centre Of The Earth	"Why?"	The Count Of Monte Cristo	0
69356	"Why so?"	A Journey To The Centre Of The Earth	"Why so?"	The Count Of Monte Cristo	0
108331	Enter VARRIUS	The Complete Works Of William Shakespeare	Enter Castigilone.	The Works Of Edgar Allan Poe	0
69348	"Exactly so."	A Journey To The Centre Of The Earth	"Exactly so."	The Count Of Monte Cristo	0
8502	"By whom?"	The Count Of Monte Cristo	"By whom?"	A Tale Of Two Cities	0
...	...	...	...	...	...
45371	"Perhaps."	The Phantom Of The Opera	"Perhaps."	The Count Of Monte Cristo	0
68880	"I am."	Around The World In 80 Days	"I am."	The Count Of Monte Cristo	0
45377	"I promise."	The Phantom Of The Opera	"I promise."	The Count Of Monte Cristo	0
45378	"When?"	The Phantom Of The Opera	"When?"	The Count Of Monte Cristo	0
68870	"Yes."	Around The World In 80 Days	"Yes."	The Count Of Monte Cristo	0
45379	"To-morrow."	The Phantom Of The Opera	"To-morrow."	The Count Of Monte Cristo	0
68856	"Agreed."	Around The World In 80 Days	"Agreed."	The Count Of Monte Cristo	0
94799	"Listen!" said he.	The Adventures Of Tom Sawyer, Complete	"Listen!" he said.	The War Of The Worlds	0
8318	"Indeed?"	The Count Of Monte Cristo	"Indeed?"	The Man In The Iron Mask	0
69441	"Et quacunque viam dederit fortuna sequamur."	A Journey To The Centre Of The Earth	"Justum et tenacem propositi virum."	The Count Of Monte Cristo	0
19453	"Bah!"	The Three Musketeers	"Bah!"	The Count Of Monte Cristo	0
44854	"How do you know?"	The Phantom Of The Opera	"How do you know?"	The Count Of Monte Cristo	0
2622	"I swear to you I will."	The Count Of Monte Cristo	"I will, I swear to you."	The Three Musketeers	0
70117	"Certainly."	A Journey To The Centre Of The Earth	"Certainly."	The Count Of Monte Cristo	0
70098	"Do you think so?"	A Journey To The Centre Of The Earth	"Do you think so?"	The Count Of Monte Cristo	0
94195	"Yes."	The Adventures Of Tom Sawyer, Complete	"Yes."	The Count Of Monte Cristo	0
70059	"And what is that?"	A Journey To The Centre Of The Earth	"And what is that?"	The Count Of Monte Cristo	0
18866	"When?"	The Three Musketeers	"When?"	The Count Of Monte Cristo	0
8734	"But what?"	The Count Of Monte Cristo	"But what?"	The Man In The Iron Mask	0
44419	I	Metamorphosis	I.	The Adventures Of Sherlock Holmes	0
108664	SONG.	The Complete Works Of William Shakespeare	SONG	The Works Of Edgar Allan Poe	0
2507	"Precisely."	The Count Of Monte Cristo	"Precisely."	The Man In The Iron Mask	0
44450	II	Metamorphosis	II.	The Adventures Of Sherlock Holmes	0
44480	III	Metamorphosis	III.	The Adventures Of Sherlock Holmes	0
44521	by	The Phantom Of The Opera	by	The Adventures Of Sherlock Holmes	0
8724	"Impossible!"	The Count Of Monte Cristo	"Impossible!"	The Picture Of Dorian Gray	0
44522	Gaston Leroux	The Phantom Of The Opera	_By_	The Three Musketeers	0
70123	"I am sure of it."	A Journey To The Centre Of The Earth	"I am sure of it."	The Count Of Monte Cristo	0
18818	"I think not."	The Three Musketeers	"I think not."	The Count Of Monte Cristo	0
29580	"Well?"	Dracula	"Well?"	The Count Of Monte Cristo	0

	book_1_paragraph	book_1_title	book_2_paragraph	book_2_title	paragraph_distance
13890	"Raoul! Raoul!"	The Man In The Iron Mask	"Raoul! Raoul! Raoul!"	The Phantom Of The Opera	0.053526
37674	Produced by David Widger and Carlo Traverso	The Works Of Edgar Allan Poe	Produced by David Widger	Adventures Of Huckleberry Finn, Complete	0.109705
107978	If you discover a Defect in this etext within 90 days of receiv- ing it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from. If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy. If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.	The Complete Works Of William Shakespeare	If you discover a Defect in this etext within 90 days of receiving it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from. If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy. If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.	None	0.127986
6588	"Oh, heavens!"	The Count Of Monte Cristo	"Oh, Heaven! Oh, Heaven!"	The Phantom Of The Opera	0.145526
44943	"Raoul!"	The Phantom Of The Opera	"Raoul! Raoul!"	The Man In The Iron Mask	0.153920
56625	Soldiers began then to make on the barrow The largest of dead-fires: dark o'er the vapor The smoke-cloud ascended, the sad-roaring fire, 10 Mingled with weeping (the wind-roar subsided) Till the building of bone it had broken to pieces, Hot in the heart. Heavy in spirit They mood-sad lamented the men-leader's ruin; And mournful measures the much-grieving widow 15 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 20 * * * * * * *	Beowulf	* Brucoea ferruginea.	The Count Of Monte Cristo	0.154537
97495	THE PREFACE	The Picture Of Dorian Gray	AUTHOR’S PREFACE	The Three Musketeers	0.156077
56317	65 Dragon, to govern, who guarded a treasure, A high-rising stone-cliff, on heath that was grayish: A path 'neath it lay, unknown unto mortals. Some one of earthmen entered the mountain, The heathenish hoard laid hold of with ardor; 70 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *	Beowulf	* Brucoea ferruginea.	The Count Of Monte Cristo	0.156910
104938	BOOK FOURTH.	Notre-Dame De Paris	The Fourth Book	Leviathan	0.158824
36765	"Surely, surely."	A Tale Of Two Cities	"Surely."	None	0.159182
33375	"Thank you. Thank you."	Great Expectations	"Thank you."	The Man In The Iron Mask	0.163576
107021	"What next?"	Notre-Dame De Paris	"What next? what next?"	The Three Musketeers	0.165573
110612	THE PROLOGUE.	The Complete Works Of William Shakespeare	Prologue	The Phantom Of The Opera	0.168098
21386	"Silence!"	The Three Musketeers	"Silence! silence!"	Notre-Dame De Paris	0.170804
103595	The Third Book	Leviathan	BOOK THIRD.	Notre-Dame De Paris	0.175778
101920	THE INTRODUCTION	Leviathan	INTRODUCTION.	The Island Of Doctor Moreau	0.181112
39109	"Yes, yes," I said, "yes, yes."	The Works Of Edgar Allan Poe	"Yes, yes."	The Man In The Iron Mask	0.183521
44577	"Mother! Mother!"	The Phantom Of The Opera	"Mother!"	Peter Pan	0.185424
104390	BOOK SECOND.	Notre-Dame De Paris	The Second Book	Leviathan	0.186028
18980	"Speak!"	The Three Musketeers	"Speak! speak!"	The Man In The Iron Mask	0.188020
42690	'Yes.'	Wuthering Heights	"'Yes.'	A Study In Scarlet	0.188326
44611	"Of course, of course."	The Phantom Of The Opera	"Of course."	The Count Of Monte Cristo	0.190496
14879	"What sort of man is he?"	The Man In The Iron Mask	"What sort of a man is he?"	The Count Of Monte Cristo	0.194105
10523	"Sir! sir!"	The Count Of Monte Cristo	"Sir!"	The Works Of Edgar Allan Poe	0.194482
56323	* * * * * * * He sought of himself who sorely did harm him, But, for need very pressing, the servant of one of The sons of the heroes hate-blows evaded, 5 Seeking for shelter and the sin-driven warrior Took refuge within there. He early looked in it, * * * * * * * * * * * * * * [76] * * * * * * when the onset surprised him,	Beowulf	* Brucoea ferruginea.	The Count Of Monte Cristo	0.195603
106566	"_De ventre inferi clamavi, et exaudisti vocem meam_.	Notre-Dame De Paris	"Justum et tenacem propositi virum."	The Count Of Monte Cristo	0.196945
15113	"Yes, a thousand times, yes!"	The Man In The Iron Mask	"Yes; a thousand times, yes!"	The Count Of Monte Cristo	0.198731
54894	"Why? Why?"	Youth	"Why?"	The Count Of Monte Cristo	0.201626
66428	"True, true."	Peter Pan	"True."	The Man In The Iron Mask	0.202981
101882	THE SECOND PART	Leviathan	SECOND PART	None	0.203286
...	...	...	...	...	...
74665	"Carrot ices."	War And Peace	"Ice, ice, and more ice."	None	0.265868
107974	(1) The etext, when displayed, is clearly readable, and does not contain characters other than those intended by the author of the work, although tilde (~), asterisk (*) and underline (_) characters may be used to convey punctuation intended by the author, and additional characters may be used to indicate hypertext links; OR	The Complete Works Of William Shakespeare	[] The etext, when displayed, is clearly readable, and does not* contain characters other than those intended by the author of the work, although tilde (~), asterisk (*) and underline (_) characters may be used to convey punctuation intended by the author, and additional characters may be used to indicate hypertext links; OR	None	0.266594
3455	"The deuce."	The Count Of Monte Cristo	"The deuce!"	The Man In The Iron Mask	0.271376
48253	"Why not?	Moby Dick; Or The Whale	"Why not?"	The Count Of Monte Cristo	0.271485
27288	"But what will you do?"	The Adventures Of Sherlock Holmes	"But you, what will you do?"	The Man In The Iron Mask	0.271746
24264	"How do you know that?"	The Three Musketeers	"How do you know?"	The Count Of Monte Cristo	0.272952
16618	Alexandre Dumas, Pere	The Three Musketeers	by Alexandre Dumas, Pere	The Count Of Monte Cristo	0.274533
36704	"How do you know it?"	A Tale Of Two Cities	"How do you know?"	The Count Of Monte Cristo	0.277022
41820	OF ENGLAND	The Works Of Edgar Allan Poe	Sunderland, England	None	0.277707
107188	Mathias Hungadi shook his head.	Notre-Dame De Paris	He shook his head.	A Tale Of Two Cities	0.278924
42377	A DREAM	The Works Of Edgar Allan Poe	Dreams	Leviathan	0.280482
62493	"Yes, ma'am, all."	Pride And Prejudice	"Yes, ma'am."	Great Expectations	0.284228
45747	"I don't understand!"	The Phantom Of The Opera	"But I don't understand!"	None	0.285117
28138	"Yes, the wine-cellar."	The Adventures Of Sherlock Holmes	"Yes."	The Count Of Monte Cristo	0.285540
25809	"And what then?"	The Adventures Of Sherlock Holmes	"What then?"	The Count Of Monte Cristo	0.286767
92638	"Well, guess."	Adventures Of Huckleberry Finn, Complete	"You guess well."	The Count Of Monte Cristo	0.287479
27570	"And is that all?"	The Adventures Of Sherlock Holmes	"Is that all?"	The Man In The Iron Mask	0.290204
17788	"But where are you going?"	The Three Musketeers	"Where are you going?"	A Study In Scarlet	0.291094
3855	"Of whom?"	The Count Of Monte Cristo	"Whom?"	The Three Musketeers	0.294814
23333	"Swear."	The Three Musketeers	"Swear it."	The Count Of Monte Cristo	0.295559
91037	"Yes--indeedy."	Adventures Of Huckleberry Finn, Complete	"Yes--yes."	A Journey To The Centre Of The Earth	0.296599
3100	"And who is Beppo?"	The Count Of Monte Cristo	"Who is Darius?"	The Phantom Of The Opera	0.296669
82013	"But what did you hear?"	War And Peace	"What did you hear?"	Sense And Sensibility	0.296731
11477	"I hear you, monsieur."	The Man In The Iron Mask	"I hear, messieurs."	A Tale Of Two Cities	0.297302
42354	TO ----	The Works Of Edgar Allan Poe	122 --_Nor pierced._	The Iliad Of Homer	0.297489
46478	A silence.	The Phantom Of The Opera	SILENCE	The Works Of Edgar Allan Poe	0.298765
11900	"What did he do, then?"	The Man In The Iron Mask	"What, then, did Peterson do?"	The Adventures Of Sherlock Holmes	0.299088
32133	"Not the least."	Great Expectations	"Not in the least."	The Count Of Monte Cristo	0.299178
91747	Romeo...................................... Mr. Garrick.	Adventures Of Huckleberry Finn, Complete	Frs. Coupe, thirty-five francs.............................. 35. From Chalons to Lyons you will go on by the steamboat.. 6. From Lyons to Avignon (still by steamboat)............. 16. From Avignon to Marseilles, seven francs............... 7. Expenses on the road, about fifty francs............... 50. Total................................................. 114 frs.	The Count Of Monte Cristo	0.299452
65834	"Where, where?"	Peter Pan	"Where?"	The Count Of Monte Cristo	0.301358